**********************************************
***** SOEP Merging and recoding **************
**********************************************

global MY_PATH_IN   "SET_YOUR_INPATH\SOEP\cs-transfer\STATA_DEEN_v35\Stata"
global MY_PATH_OUT  "SET_YOUR_OUTPATH"



****** Loading Variables

use pid cid hid syear 							/// identifier
	plh0212 - plh0226							/// 
	plh0184 plh0185 plh0186 plh0187				/// 
	plh0001 plh0003 plh0004 plh0006 plh0007 plh0011_h plh0012_h plh0013_h		/// 
	plh0171 plh0173 plh0175 plh0176 plh0178 plh0182 plh0192 			/// 
	plh0033										/// 
	plh0204_h									/// 
	pla0009_v1 pla0009_v2						/// 
		using "$MY_PATH_IN\pl.dta", clear

drop if syear==.
gen double  upid = (syear * 100000000) + pid
gen double  uhid = (syear * 100000000) + hid

save "$MY_PATH_OUT\pl.dta", replace


* Individual-level: voting variables (wide-format only)

//2013: use 2014 for 2013
use pid syear bap126 using "$MY_PATH_IN\raw\bap.dta", clear
replace syear = 2009
gen double  upid = (syear * 100000000) + pid
recode bap126 (1=1 "voted")(-1 2=0 "did not vote")(else=.), gen(vote2009)
drop pid bap126
save "$MY_PATH_OUT\vote2009.dta", replace


//2013: use 2014 for 2013
use pid syear bep121 using "$MY_PATH_IN\raw\bep.dta", clear
replace syear = 2013
gen double  upid = (syear * 100000000) + pid
recode bep121 (1/27 30 = 1 "voted")(-1 28=0 "did not vote")(else=.), gen(vote2013)
drop pid bep121
save "$MY_PATH_OUT\vote2013.dta", replace

//2017: use 2018 for 2017
use pid syear bip_175 using "$MY_PATH_IN\raw\bip.dta", clear
replace syear = 2017
gen double  upid = (syear * 100000000) + pid
recode bip_175 (1/27 30 31= 1 "voted")(-1 28=0 "did not vote")(else=.), gen(vote2017)
drop pid bip_175
save "$MY_PATH_OUT\vote2017.dta", replace



* Individual-level, generated: pgen.dta
use pid cid hid syear /// identifier
	pgpsbil						/// education/degree
	pgemplst pglfs				/// employment status
		using "$MY_PATH_IN\pgen.dta", clear

drop if syear==.
gen double  upid = (syear * 100000000) + pid
gen double  uhid = (syear * 100000000) + hid

save "$MY_PATH_OUT\pgen.dta", replace


* Household-level: hl.dta
use cid hid syear 		 /// identifier
	hlc0005_h				 /// HH income
		using "$MY_PATH_IN\hl.dta", clear

drop if syear==.
gen double  uhid = (syear * 100000000) + hid

save "$MY_PATH_OUT\hl.dta", replace


* Household-level: hgen.dta
use cid hid syear 		 /// identifier
	hgi1hinc hgi2hinc hgi3hinc hgi4hinc hgi5hinc	 /// HH income (imputed)
		using "$MY_PATH_IN\hgen.dta", clear

drop if syear==.
gen double  uhid = (syear * 100000000) + hid

save "$MY_PATH_OUT\hgen.dta", replace


* Household-level, brutto: hbrutto.dta
use cid hid syear /// identifier
	hhgr						/// HH size
		using "$MY_PATH_IN\hbrutto.dta", clear

drop if syear==.
gen double  uhid = (syear * 100000000) + hid

save "$MY_PATH_OUT\hbrutto.dta", replace




* Parent data: bioparen.dta
use pid cid /// identifier
	mnr fnr					/// parental identifier
		using "$MY_PATH_IN\bioparen.dta", clear

save "$MY_PATH_OUT\bioparen.dta", replace



* Individual data: biobirth.dta
use pid cid  			/// identifier
	sex gebjahr					/// 
		using "$MY_PATH_IN\biobirth.dta", clear

save "$MY_PATH_OUT\biobirth.dta", replace


* Health data: health.dta
use pid cid syear 			/// identifier
	mcs pcs pf_nbs rp_nbs bp_nbs gh_nbs vt_nbs sf_nbs re_nbs mh_nbs		/// physical and mental health
		using "$MY_PATH_IN\health.dta", clear

drop if syear==.
gen double  upid = (syear * 100000000) + pid

save "$MY_PATH_OUT\health.dta", replace


* Migration status
use pid cid syear 			/// identifier
	germborn 				///
		using "$MY_PATH_IN\ppathl.dta", clear
gen double  upid = (syear * 100000000) + pid

save "$MY_PATH_OUT\migrant.dta", replace


******* Merging data

use "$MY_PATH_OUT\pl.dta", clear
merge 1:1 upid using "$MY_PATH_OUT\vote2009.dta", nogen keep(master match)
merge 1:1 upid using "$MY_PATH_OUT\vote2013.dta", nogen keep(master match)
merge 1:1 upid using "$MY_PATH_OUT\vote2017.dta", nogen keep(master match)

gen vote = .
replace vote = vote2009 if syear==2009 
replace vote = vote2013 if syear==2013 
replace vote = vote2017 if syear==2017 
*keep if syear==2009 | syear==2013 | syear==2017

merge 1:1 upid using "$MY_PATH_OUT\pgen.dta", nogen keep(master match)
merge 1:1 upid using "$MY_PATH_OUT\health.dta", nogen keep(master match)
merge 1:1 upid using "$MY_PATH_OUT\migrant.dta", nogen keep(master match)
merge m:1 pid using "$MY_PATH_OUT\bioparen.dta", nogen keep(master match)
merge m:1 pid using "$MY_PATH_OUT\biobirth.dta", nogen keep(master match)
merge m:1 uhid using "$MY_PATH_OUT\hl.dta", nogen keep(master match)
merge m:1 uhid using "$MY_PATH_OUT\hgen.dta", nogen keep(master match)
merge m:1 uhid using "$MY_PATH_OUT\hbrutto.dta", nogen keep(master match)

*gen unique ids
gen double  ufnr = (syear * 100000000) + fnr
gen double  umnr = (syear * 100000000) + mnr

*save
save "$MY_PATH_OUT\soep.dta", replace


******* Recode data
use "$MY_PATH_OUT\soep.dta", clear


*political attitudes
fre plh0001 plh0003 plh0004 plh0007 plh0011_h plh0012_h plh0013_h

*recode plh0001 (1=6 "very much in favor")(2=5)(3=4)(4=3)(5=2)(6=1 "very much opposed") ///
*		(7=.a "DK")(-8=.b "not part of questionnaire")(-2=.c "does not apply")(-1=.d "refused"), gen(demo)
recode plh0003 (1=5 "definetely going to vote")(2=4)(3=3)(4=2)(5=1 "definetely not going to vote") ///
		(6=.a "not eligible")(-8=.b "not part of questionnaire")(-1=.d "refused"), gen(vote_prob)
recode plh0004 (-8=.b "not part of questionnaire")(-1=.d "refused"), gen(leftright)
recode plh0007 (1=4 "very strong")(2=3)(3=2)(4=1 "not at all") ///
		(-8=.b "not part of questionnaire")(-5=.e "not part of this version") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(polint)
recode plh0012_h (1=1 "SPD")(2 3 13=2 "CDU/CSU")(4=3 "FDP")(5=4 "Greens")(6=5 "Left Party") ///
		(27=6 "AfD")(7=7 "NPD/REP/RW") (8 26=8 "Other")(9/25 30 31=9 "Multiple Party Pref.") ///
		(-5=.e "not part of this version")(-4=.f "illegal multiple response") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(partyid)
recode plh0011_h (1=1 "has pid")(-1 2=0 "no pid")(else=.), gen(partyid01)		
recode plh0013_h (1=5 "very strong")(2=4)(3=3)(4=2)(5=1 "very weak") ///
		(-5=.e "not part of this version")(-2=.c "does not apply")(-1=.d "refused"), gen(partyid_str)
replace partyid_str = 0 if partyid01==0
		
*lab var demo "Satisfaction with democracy"
lab var vote_prob "Propensity to vote"
lab var vote "Voted at election"
lab var leftright "Left-Right Self-Placement Scale"
lab var polint "Political Interest"
lab var partyid "Party ID"
lab var partyid_str "Party ID: Strength"

*gen demo10 = (demo-1)*(10/5)
gen vote_prob10 = (vote_prob-1)*(10/4)
gen polint10 = (polint-1)*(10/3)
gen partyid_str10 = (partyid_str)*(10/5)



*Satisfaction with personal situation
recode plh0033  (1=3 "major worries")(2=2 "some worries")(3=1 "no worries") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(worry_econ)
recode plh0171 (0=0 "low")(1=1)(2=2)(3=3)(4=4)(5=5)(6=6)(7=7)(8=8)(9=9)(10=10 "high")  ///
		(-8=.b "not part of questionnaire") (-5=.e "not part of this version")(-3=.e "not valid") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(sat_health)		
recode plh0173 (0=0 "low")(1=1)(2=2)(3=3)(4=4)(5=5)(6=6)(7=7)(8=8)(9=9)(10=10 "high")  ///
		(-8=.b "not part of questionnaire") (-5=.e "not part of this version")(-3=.e "not valid") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(sat_work)		
recode plh0175 (0=0 "low")(1=1)(2=2)(3=3)(4=4)(5=5)(6=6)(7=7)(8=8)(9=9)(10=10 "high")  ///
		(-8=.b "not part of questionnaire") (-5=.e "not part of this version")(-3=.e "not valid") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(sat_hhinc)		
recode plh0176 (0=0 "low")(1=1)(2=2)(3=3)(4=4)(5=5)(6=6)(7=7)(8=8)(9=9)(10=10 "high")  ///
		(-8=.b "not part of questionnaire") (-5=.e "not part of this version")(-3=.e "not valid") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(sat_persinc)	
recode plh0178 (0=0 "low")(1=1)(2=2)(3=3)(4=4)(5=5)(6=6)(7=7)(8=8)(9=9)(10=10 "high")  ///
		(-8=.b "not part of questionnaire") (-5=.e "not part of this version")(-3=.e "not valid") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(sat_leisuretime)
recode plh0182 (0=0 "low")(1=1)(2=2)(3=3)(4=4)(5=5)(6=6)(7=7)(8=8)(9=9)(10=10 "high")  ///
		(-8=.b "not part of questionnaire") (-5=.e "not part of this version")(-3=.e "not valid") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(sat_life)		

lab var worry_econ "Worried about personal economic Situation"
lab var sat_health "Satisfaction with health"
lab var sat_work "Satisfaction with work"
lab var sat_hhinc "Satisfaction with household income"
lab var sat_persinc "Satisfaction with personal income"
lab var sat_leisuretime "Satisfaction with amount of leisure time"
lab var sat_life "Satisfaction with life"

clonevar hhinc_sat = sat_hhinc
		


*socio-demographics
replace sex = 1 if sex==. & pla0009_v2==1
replace sex = 2 if sex==. & pla0009_v2==2

recode sex (1=0 "male")(2=1 "female")(else=.), gen(female)

recode pgemplst (1=1 "full-time employed")(2=2 "regular part-time")(3=3 "vocational training") ///
		(4=4 "marginal, irregular part-time")(5=5 "not employed")(6=6 "Sheltered workshop/Behindertenwerkstatt") ///
		(-8=.b "not part of questionnaire") (-5=.e "not part of this version")(-3=.e "not valid") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(emplst)
recode pglfs  (-8=.b "not part of questionnaire") (-5=.e "not part of this version")(-3=.e "not valid") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(lfst)

lab var emplst "Employment Status"
lab var lfst "Labor Force Status"

recode pglfs (6=1 "unemployed")(1/5 8/12=0)(else=.), gen(unemployed)
recode pglfs (1/4 8 9=1 "nlf")(5 6 10 11 12=0)(else=.), gen(nlf)
	
		
recode pgpsbil (-8=.b "not part of questionnaire") (-5=.e "not part of this version")(-3=.e "not valid") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(edu)
recode pgpsbil (1 6 8=1 "low")(2=2 "medium")(3 4=3 "high")(5 7=.z "other/still in school") ///
		(-8=.b "not part of questionnaire") (-5=.e "not part of this version")(-3=.e "not valid") ///
		(-2=.c "does not apply")(-1=.d "refused"), gen(edu3)		

lab var edu "Education"
lab var edu3 "Education (3 categories)"	

recode edu3 (2=1 "med")(1 3 = 0)(else=.), gen(edu_med)	
recode edu3 (3=1 "high")(1 2 = 0)(else=.), gen(edu_high)	

mvdecode gebjahr, mv(-1)
gen age = syear-gebjahr
lab var age "Age (during year of survey)"

gen age18 = age-18
recode age (13/35=1)(36/120=0)(else=.), gen(age1835)

recode germborn (1=0 "no migrant")(2=1 "migrant")(else=.), gen(migrant)

* hh size
clonevar hhsize = hhgr
lab var hhsize "Number of People in Household"

* income
mvdecode hlc0005_h, mv(-3 -1 999999)
clonevar hhinc = hlc0005_h
lab var hhinc "Household Income (monthly, net)"

gen hhinc_pp = hhinc/hhsize
lab var hhinc_pp "Household Income per person (monthly, net)"

gen hhinc_pp_sqrt = hhinc/(sqrt(hhsize))
lab var hhinc_pp_sqrt "Household Income per SQRT person (monthly, net)"

xtile hhinc_dec = hhinc, nq(10) 
xtile hhinc_dec_pp = hhinc_pp, nq(10) 
xtile hhinc_dec_pp_sqrt = hhinc_pp_sqrt, nq(10) 

gen hhinc_dec10 = (hhinc_dec-1)*(10/9)
gen hhinc_dec_pp10 = (hhinc_dec_pp-1)*(10/9)
gen hhinc_dec_pp_sqrt10 = (hhinc_dec_pp_sqrt-1)*(10/9)

*imputed income
egen hhinc_imp = rowmean(hgi1hinc hgi2hinc hgi3hinc hgi4hinc hgi5hinc)	// use mean of imputed inc variables
gen eqinc_imp = hhinc_imp/(sqrt(hhsize))
xtile eqinc_imp_dec = eqinc_imp, nq(10) 

lab var pid "Unique Person-ID"
lab var hid "Unique Household-ID"
lab var cid "Original Household-ID"
lab var syear "Survey Year"
lab var fnr "Father-ID"
lab var mnr "Mother-ID"
lab var sex "Sex"


*save
keep pid-syear upid uhid sex vote ufnr-eqinc_imp_dec
save "$MY_PATH_OUT\soep_long.dta", replace

erase "$MY_PATH_OUT\pl.dta"
erase "$MY_PATH_OUT\pgen.dta"
erase "$MY_PATH_OUT\health.dta"
erase "$MY_PATH_OUT\migrant.dta"
erase "$MY_PATH_OUT\bioparen.dta"
erase "$MY_PATH_OUT\biobirth.dta"
erase "$MY_PATH_OUT\hl.dta"
erase "$MY_PATH_OUT\hbrutto.dta"













